/*******************************************************************************
 Data preparation for merged dataset
* 1. Merge individual files
* 2. Create variables that require the merged dataset
* 3. Some further recoding and labeling 
* 4. Save as owwa_data_merged"
* 5. Reshape data to long format
* 6. Create STE and Index variables
* 7. Save as owwa_data_merged_long
*******************************************************************************/

*******************************************************************************/
* 1. Merge individual files
use "$data_prep/owwa_hsw_baseline_edited", clear

merge 1:1 rno using "$data_prep/owwa_hh_w2_edited", gen(proxy_2)
recode proxy_2 (3=1) (1=0)
lab var proxy_2 "Proxy interview in wave 2" 
lab val proxy_2 l_yesno

merge 1:1 rno using "$data_prep/owwa_hsw_w3_edited", gen(direct_3)
recode direct_3 (3=1) (1=0)
lab var direct_3 "Direct interview in wave 3" 
lab val direct_3 l_yesno

merge 1:1 rno using "$data_prep/owwa_proxy_w3_edited", gen(proxy_3) update
recode proxy_3 (3 5=1) (1=0)
lab var proxy_3 "Proxy interview in wave 3" 
lab val proxy_3 l_yesno

merge 1:1 rno using "$data_prep/owwa_hh_w3_edited", nogen update

*******************************************************************************/
* 2. Create variables that require the merged dataset
* Status variable requires merged data
gen status_2 = hh_b0_current_abroad_2==1 // 1 = answer to w2 question "is currently abroad = yes" 0 = anything else

gen     hh_status_3=1 if (hh_a5a_current_loc_3==2 | hh_a5a_current_loc_3==3) &  (hh_a2_change_emp_3==0 | hh_a2_change_emp_3>=.) // 5 cases with missing information on employer change will be coded as staying with initial employer
replace hh_status_3=2 if (hh_a5a_current_loc_3==2 | hh_a5a_current_loc_3==3) & hh_a2_change_emp_3==1
replace hh_status_3=3 if  hh_a5a_current_loc_3==1 & (hh_a5c_loc_ph_3==1 | hh_a5c_loc_ph_3==2) 
replace hh_status_3=4 if  hh_a5a_current_loc_3==4 & (hh_a5c_loc_ph_3==1 | hh_a5c_loc_ph_3==2) 
replace hh_status_3=5 if  hh_a5c_loc_ph_3==3 // never worked in HK/KSA

gen status_3 = h_status_3 
replace status_3 = hh_status_3 if status_3>=.

lab define l_status 1 "Abroad with initial employer" 2 "Abroad with different employer" 3 "Back in the Philippines" 4 "In other country after being in HK/KSA" 5 "Never went to HK/KSA"
lab val status_3 h_status_3 hh_status_3 l_status
lab var hh_status_3 "Status at the time of wave 3 hh interview"
lab var status_3 "Status at the time of wave 3 interview (any)"
tab status_3, gen(status_3_b)
tab hh_status_3, gen(hh_status_3_b)


* Duration since PDOS

* Note that we use duration since PDOS instead departure since departure might be endogeneous

* Wave 2 proxy interview
gen duration_2 = hh_date_interview_2 - h_date_interview_1
gen lduration_2 = log(duration_2)
lab var duration_2 "Days since PDOS"
lab var lduration_2 "Days since PDOS (log)"

* Wave 3 direct or proxy interview
gen duration_3 = h_dateofinterview_3 - h_date_interview_1
replace duration_3 = hh_dateofinterview_3 - h_date_interview_1 if duration_3==. // for proxy interviews
gen lduration_3 = log(duration_3)
lab var duration_3 "Days since PDOS"
lab var lduration_3 "Days since PDOS (log)"

* Wave 3 imputed duration
gen i_duration_3 = duration_3
replace i_duration_3 = duration_2 if duration_3 ==. & duration_2 <.
gen li_duration_3 = log(i_duration_3)
lab var i_duration_3 "Days since PDOS"
lab var li_duration_3 "Days since PDOS (log)"

* Wave 3 household interview
gen hh_duration_3 = hh_dateofinterview_3 - h_date_interview_1  // for household interview.
gen lhh_duration_3 = log(hh_duration_3)
lab var hh_duration_3 "HH int 3 - Days since PDOS"
lab var lhh_duration_3 "HH int 3 - Days since PDOS (log)"

* Wave 1 household baseline interview
gen hh_duration_1 = hh_date_interview_1 - h_date_interview_1  // for household interview.
gen lhh_duration_1 = log(hh_duration_1)
lab var hh_duration_1 "HH int 1 - Days since PDOS"
lab var lhh_duration_1 "HH int 1 - Days since PDOS (log)"

gen n_account_3 = account_3 if account==0 
lab var n_account_3 "New personal savings account"
gen o_account_3 = account_3 if account==1
lab var o_account_3 "Still has personal savings account"
lab val n_account_3 o_account_3 l_yesno

* Indicators whether household respondent is the same as in baseline
gen hh_sameresp_2 = 1 if hh_nature_respo_2 ==1
replace hh_sameresp_2 = 0 if hh_nature_respo_2 >1 & hh_nature_respo_2 <.
gen hh_sameresp_3 = 1 if hh_a1_baseline_respo_3==1
replace hh_sameresp_3 = 0 if hh_a1_baseline_respo_3==2

qui ds, has(type numeric)
local x=r(varlist)
recode `x' (.=.c) 


* Generate binary variable for low leisure
gen lowleisure_2 = sparetime_2<8 if sparetime_2<.
gen lowleisure_3 = sparetime_3<8 if sparetime_3<.

* Impute missings in wave 3 variables with data from wave 2
foreach var in text_freq_ call_freq_ call_length_ noshouting_ noviolence_ nothreat_ ///
	nosexharass_ enoughfood_ work_start_ work_end_ workhours_ worktime_ sparetime_ lowleisure_ ///
	restday_ leavehouse_ salary_ontime_ sal_noded_ no_worksick_ employer_good_ employer_short_ employer_slaver_ ///
	employer_violent_ employer_strict_ noemployer_short_ noemployer_slaver_ noemployer_violent_ noemployer_strict_ {
	gen i_`var'3 = `var'3
	capture replace i_`var'3 = `var'2 if i_`var'3>=. & `var'2<.
	
}

*******************************************************************************/
* 3. Some further recoding and labeling 

* Relabel variables
lab var text_freq_2 "Text frequency (in a week)"
lab var leavehouse_2 "DW can leave the employer's house"
lab var leavehouse_3 "DW can leave the employer's house"
lab var restday_1 "DW has rest day once a week"
lab var restday_2 "DW has rest day once a week"
lab var restday_3 "DW has rest day once a week"
lab var salary_ontime_2 "DW receives her salary on time"
lab var salary_ontime_3 "DW receives her salary on time"
lab var sal_noded_2 "DW did not experience salary deduction"
lab var sal_noded_3 "DW did not experience salary deduction"
lab var employer_good_2 "Employer is described as good or ok"
lab var employer_good_3 "Employer is described as good or ok"
lab var total_savings_2 "Total amount of savings (in USD)"
lab var total_savings_3 "Total amount of savings (in USD)"
lab var hsw_savings_2 "DW savings (USD)"
lab var hsw_savings_3 "DW savings (USD)"
lab var fam_savings_2 "Family savings (USD)"
lab var hh_fam_sav_3 "Family savings (USD)"
lab var coord_success_2 "Successful financial coordination between DW and family"
lab var coord_attempt_2 "Financial coordination between DW and family"
lab var call_length_2 "Length of phone conversation (in minutes)"
lab var call_freq_2 "Length of phone conversation (in minutes)"
lab var status_3_b1 "Abroad with initial employer"
lab var status_3_b3 "Back in the Philippines"
lab var stay_ph_3 "DW plans to stay in the Philippines"
lab var hh_stay_ph_3 "DW plans to stay in the Philippines"
lab var child_ofw_3 "DW thinks good for children to become OFW"
lab var stay_employer_3 "DW plans to continue working for current employer"
lab var hh_stay_employer_3 "DW plans to continue working for current employer"
lab var hh_total_sav_3 "HH: Total savings (USD)"
lab var hh_fam_sav_3 "HH: Family savings (USD)"
lab var hh_hsw_sav_3 "HH: DW savings (USD)"
lab var hh_am_remit_3 "HH: Remittances most recent transaction (USD)"
lab var happy_1 "Happier"
lab var happy_3 "Happier"
lab var nervous_1 "Less nervous"
lab var nervous_3 "Less nervous"
lab var downheart_1 "Less down hearted and blue"
lab var downheart_3 "Less down hearted and blue"
lab var pain_1 "Less pain"
lab var pain_3 "Less pain"
lab var homesick_3 "Less homesick"
lab var overwhelm_3 "Less overwhelmed"

* Generate awareness and knowledge of gift treatment by household
gen hh_remember_gift = remember_gift_2==1 if proxy_2 == 1 
replace hh_remember_gift = 0 if remember_gift_3 != 1 & proxy_3==1
replace hh_remember_gift = 1 if remember_gift_3 == 1 & proxy_3==1
lab var hh_remember_gift "HH remembers that DW received mangoes"

gen hh_gave_gift = gave_gift_2==1 if proxy_2 == 1 
replace hh_gave_gift = 0 if gave_gift_3 != 1 & proxy_3==1
replace hh_gave_gift = 1 if gave_gift_3 == 1 & proxy_3==1
lab var hh_gave_gift "HH remembers that DW gave gift to employer"

* Final sample (drops 5 observations where DWs changed class last minute or were not on registration lists)
drop if finlit>=.
drop if gift_assigned>=.
replace gift = gift_assigned // use the assigned gift treatment variable including cases where interviewer recording deviated from assignment list

compress

save "$data_prep/owwa_data_merged", replace

********************************************************************************
* 4. Save as owwa_data_merged"

use "$data_prep/owwa_data_merged", clear

********************************************************************************
* 5. Reshape data to long format

	* shorten some variable names
	rename hh_fe11_disagree_remit_1 hh_disagree_use_1
	rename hh_d13_extra_remittance_2 hh_fam_extra_mon_2
	rename salary_ontime_3 sal_ontime_3
	rename salary_ontime_2 sal_ontime_2
	rename i_salary_ontime_3 i_sal_ontime_3

	foreach var of varlist *employer_* {
		local h = subinstr("`var'","employer","empl",.)
		rename `var' `h'
	}


* List of outcomes coutcomes
local outcomes "lduration_ li_duration_ remember_gift_ gave_gift_ text_freq_ call_freq_ call_length_ noshouting_ noviolence_ nothreat_ nosexharass_ no_worksick_ enoughfood_ work_start_ work_end_ workhours_ worktime_ sparetime_ lowleisure_ restday_ leavehouse_ sal_ontime_ sal_noded_ med_treat_ empl_good_ empl_short_ empl_slaver_ empl_violent_ empl_strict_ noempl_short_ noempl_slaver_ noempl_violent_ noempl_strict_ empl_subj_index_ empl_rel_ empl_rel_now_ empl_rel_av_ total_savings_ l_total_sav_ any_hsw_savings_ hsw_savings_ l_hsw_savings_ any_fam_savings_ fam_savings_ l_fam_savings_ share_hsw_sav_ freq_remit_ amount_remit_ l_am_remit_ l_remit_total_ hh_efftot_ b_hh_efftot_ b_hh_financial_ b_hh_stdliving_ b_hh_housing_ b_hh_health_ b_hh_educ_ b_hh_famlife_ b_hh_soclife_ dis_budget_  sent_more_ fam_extra_mon_ hh_disagree_use_ hh_fam_extra_mon_ remit_bank_ i_text_freq_ i_call_freq_ i_call_length_ i_noshouting_ i_noviolence_ i_nothreat_ i_nosexharass_ i_enoughfood_ i_work_start_ i_work_end_ i_workhours_ i_worktime_ i_sparetime_ i_lowleisure_ i_restday_ i_leavehouse_ i_sal_ontime_ i_sal_noded_ i_no_worksick_ i_empl_good_ i_empl_short_ i_empl_slaver_ i_empl_violent_ i_empl_strict_ i_noempl_short_ i_noempl_slaver_ i_noempl_violent_ i_noempl_strict_"

local well_3 "migrant_well_3 mental_health_3 happy_3 nervous_3 downheart_3 pain_3 homesick_3 overwhelm_3"

mi reshape long `outcomes', i(rno) j(wave)

gen type_int = 1 if proxy_2==1 & wave==2
replace type_int = 2 if proxy_3==1 & wave==3
replace type_int = 3 if direct_3==1 & wave==3
replace type_int = 4 if type_int==.

********************************************************************************
* 6. Create STE and Index variables
	
* For the standardized treatment effect
foreach x in `outcomes' `well_3' {
	gen pos_`x' = `x'
	replace pos_`x' = 1 if `x'==.c  & ((proxy_3==1 & wave==3) | (proxy_2==1 & wave==2)) // replace missings in proxy interviews with favorable outcomes
	gen pos_s_`x' = .c
	gen neg_`x' = `x'
	replace neg_`x' = 0 if `x'==.c & ((proxy_3==1 & wave==3) | (proxy_2==1 & wave==2)) // replace missings in proxy interviews with unfavorable outcomes
	gen neg_s_`x' = .c
	gen s_`x'= .c
		forvalues wave = 2/3 { // standardize within waves
			sum `x' if gift==0 & wave==`wave' & status_3!=5 
			replace s_`x'=(`x'-r(mean))/r(sd) if wave==`wave' & status_3!=5
		}
}


egen ntreatvars = rownonmiss(s_noshouting_ s_noviolence_ s_nothreat_ s_nosexharass_ ///
s_no_worksick_ s_enoughfood_ s_restday_ s_leavehouse_ s_sal_ontime_ s_sal_noded_)

egen ncomvars = rownonmiss(s_text_freq_  s_call_freq_  s_call_length_)

* Standardized communication index
egen std_comm = rowmean(s_text_freq_  s_call_freq_  s_call_length_)

egen std_comm_imp = rowmean(s_i_text_freq_  s_i_call_freq_ s_i_call_length_)

egen i_nstd_employer = rowmean(i_empl_good_ i_noempl_short_ i_noempl_slaver_ ///
i_noempl_violent_ i_noempl_strict_) 

* Treatment index 
egen std_treat = rowmean(s_noshouting_ s_noviolence_ s_nothreat_ s_nosexharass_ ///
s_no_worksick_ s_enoughfood_ s_sparetime_ s_restday_ s_leavehouse_ s_sal_ontime_ s_sal_noded_)

* Treatment index where missings in wave 3 are replaced with wave 2 values
egen std_treat_si = rowmean(s_i_noshouting_ s_i_noviolence_ s_i_nothreat_ s_i_nosexharass_ s_i_no_worksick_ ///
 s_i_enoughfood_ s_i_sparetime_ s_i_restday_ s_i_leavehouse_ s_i_sal_ontime_ s_i_sal_noded_)

* Standardized wellbeing index (currently not used)
egen std_well = rowmean(s_happy_3 s_nervous_3 s_downheart_3 s_pain_3 s_homesick_3 s_overwhelm_3)

foreach var of varlist std_treat* std_comm* std_well  {
		forvalues wave = 2/3 {
			sum `var' if gift==0 & wave==`wave' & status_3!=5 
			replace `var'=(`var'-r(mean)) if wave==`wave' & status_3!=5
		}
}

* gen log of amount_remit 
gen l_amount_remit = log(amount_remit + 1)

recode direct_3 (.=0) // needed for the imputation with wave 2 data
replace type_int = 4 if type_int==. 

gen hh_int_3 = hh_duration_3<.
gen hh_int_1 = hh_duration_1<.

recode child_ofw_3 (1 2 3=0) (4 5=1)
recode hh_k1_e_fam_child_ofw_3 (1 2 =1) (3 4 5=0), generate(hh_child_ofw_3)
lab val child_ofw_3 hh_child_ofw_3 l_yesno

*Gen either direct or proxy interwiev indicator
gen directorproxy_3=(direct_3==1 | proxy_3==1)
label var directorproxy "Dummy 1: if either direct or proxy interwiev"

*Successful re-interview with HH   
gen successint_hh_3=(hh_duration_3!=.c)
label var successint_hh_3 "Dummy 1: if Successful re-interview with HH "

* Gen new variable, plans to stay with inital employer and for the ones that are with initial employer and plan to stay with current employer
gen status_3_c1 = status_3_b1==1 & stay_empl_3==1 if status_3_b1<.
label var status_3_c1 "Dummy 1: if plans to stay with initial employer"

save "$data_prep/owwa_data_merged_long", replace



